/* **************************************************************
NAME: yemenreweightingtable.ado 
CREATED: 7 December 2021
EDITED: 13 December 2021 
AUTHOR: callan.corcoran@kellogg.northwestern.edu
PURPOSE: Program to run bootstrapping and IPW reweighting
************************************************************** */


cap prog drop yemenreweightingtable

prog def yemenreweightingtable
syntax varlist [if/], 	/// Variables to be included in table 
	numboots(integer)		/// Set number of bootstrap iterations to run 
	[filename(string)] 		/// File name (default is TableX)
	[foldername(string)]	/// Output folder name (default is Replication/Output)
	[title(string)] 		/// Table title 
	[footnote(string)] 		/// Footnote text 
	[baselinevals] 			/// Add baseline values of variables, when applicable
	[winsorize] 			/// Winsorize variables 
	[addlcontrols(varlist)] /// Add list of additional controls 
	[fixedeffects(varlist)] /// Set fixed effects (default is village) 
	[stderrors(string)] 	/// Set standard errors (default is robust)
	[balance(varlist)]		/// Add balance tests for each variable 
	
// **************************************************************
// PREPARE TABLE INPUTS 	

// FOLDER NAME 
// 	Set default folder to Replication/Output if not specified 
if "`foldername'"=="" {
	loc foldername "${rep_output}" 
}

// FILE NAME 
// 	Set a default filename 
if "`filename'"=="" {
	loc filename "TableX"
}
if "`winsorize'"=="winsorize" {
	loc filename "`filename'_win"
}
if "`baselinevals'"=="baselinevals" {
	loc filename "`filename'_blv"
}
if "`addlcontrols'"!="" {
	loc filename "`filename'_ctl"
}

// WINSORIZE 
//	Edit varlist to include winsorized variable names if "winsorize" selected 
loc templist `varlist'
loc varlist // Clear varlist 
foreach var in `templist' {
	loc currvar = subinstr("`var'","_end","",.) // Cut out "_end" suffix 
	if "`winsorize'"=="winsorize" {
		loc varlist `varlist' `currvar'_win1 // Add "_win1" suffix if "winsorize" is selected
	}
	if "`winsorize'"=="" {
		loc varlist `varlist' `currvar'
	}
}

// ADDITIONAL CONTROLS 
loc templist `addlcontrols'
loc addlcontrols 
foreach var in `templist' {
    loc currvar = subinstr("`var'","_bsl","_win1_bsl",.) // Cut out "_end" suffix 
	if "`winsorize'"=="winsorize" {
		loc addlcontrols `addlcontrols' `currvar' m_`currvar' // Add "_win1" suffix if "winsorize" is selected
	}
	if "`winsorize'"=="" {
		loc varlist `addlcontrols' `var' m_`var'
	}
}

// FIXED EFFECTS 
// 	Set fixed effects to village, if not specified 
if "`fixedeffects'"=="" {
	loc fixedeffects village 
}

// STANDARD ERRORS 
//	Default standard errors to robust 
if "`stderrors'"=="" {
	loc stderrors robust 
}

// IF-STATEMENT 
if "`if'"!="" {
    loc ifif "if `if'"
	loc andif "& `if'"
} 

// NUMBER OF BOOTSTRAP REPETITIONS 
if "`numboots'"=="" {
	loc numboots 20 
}

// FOOTNOTES 
// 	Add footnotes about baseline values and additional controls
if "`winsorize'"=="winsorize" {
	loc footnote "`footnote' Variables winsorized at 1%."
}
if "`baselinevals'"=="baselinevals" {
	loc footnote "`footnote' Controls for baseline value of dependent variable."
}
if "`addlcontrols'"!="" {
	loc footnote "`footnote' Controls for additional variables."
}


// **************************************************************
// **************************************************************
// PART ONE

// **************************************************************
// CREATE A BASE DATASET 

	// Clear estimates 
	clear
	eststo clear
	estimates drop _all 

	use "${rep_analysis}.dta", clear

	bysort village: g hhinvillage = _N  // NEW: Create a variable with the count of households per village 
	la var hhinvillage "Number of Households in Village"

	// **************************************************************
	// REPLACE MISSINGS WITH ZEROS

	// ADDITIONAL CONTROLS 
	// 	Replace missings with zeroes 
	foreach var in `addlcontrols' {
		replace `var' = 0 if missing(`var') // Replace to 0 if missing 
	}
	
	foreach y_var in `varlist' { // Loop through outcome variables 
	
	//***********************************************************
	// BASELINE CONTROLS 
	// 	Replace missings with zeroes 
	// 	Add "missing" dummies to controls 

	// Check whether baseline value exists 
	cap confirm variable `y_var'_bsl
	
	// If baseline doesn't exist... 
	if _rc { 
		loc proxyflag = 1 
		loc created_var = 1					
		gen `y_var'_bsl = 1 			// Create stand-in baseline var
		gen m_`y_var'_bsl = 1 			// Create stand-in baseline dummy var
		// estadd loc stat`blcontrols' = "No" : col`i' // Note that baseline controls not used
	}
	
	// If baseline exists... 
	else if !_rc {
		loc proxyflag = 0 
		loc created_var = 0
		replace `y_var'_bsl = 0 if missing(`y_var'_bsl) // Replace baseline to 0 if missing 	
		// if ("`baselinevals'"=="baselinevals") estadd loc stat`blcontrols' = "Yes" : col`i' // Note baseline controls used 
		// else estadd loc stat`blcontrols' = "No" : col`i' // Note baseline controls not used 
	}
		
		local blvals `blvals' `y_var'_bsl m_`y_var'_bsl // Add to baseline variables local 
	}

	//***********************************************************
	// STREAMLINE AND SAVE DATASET 
	
	foreach var in `varlist' {
		local outcomevars `outcomevars' `var'_end 
	}
	
	keep `outcomevars' treatment surveyed village hhinvillage `addlcontrols' `blvals'
	
	save "${rep_data}/bootsdataset.dta", replace 
	
// **************************************************************
// RUN BOOTSTRAPPING 		
			
	// **************************************************************
	// GENERATE ESTIMATES 


	foreach y_var in `varlist' { // Loop through table variables 
	
	// NEW: Prepare a variable-specific matrix for estimated beta (b) and standard error (se)
	matrix bootsmat_`y_var' = J(`numboots', 2, .)
	matrix colnames bootsmat_`y_var' = "b_hat" "se_hat"	// Set column names
	set seed 1 // NEW: Set seed (outside of loop)

	forval b = 1/`numboots' { // NEW: Loop through user-specified number of bootstraps (?) 
	
	//***********************************************************
	// BASELINE CONTROLS 
	// 	Replace missings with zeroes 
	// 	Add "missing" dummies to controls 

	use "${rep_data}/bootsdataset.dta", clear 

	// Check whether baseline value exists 
	cap confirm variable `y_var'_bsl
	
	// If baseline doesn't exist... 
	if _rc { 
		loc proxyflag = 1 
		loc created_var = 1					
		gen `y_var'_bsl = 1 			// Create stand-in baseline var
		gen m_`y_var'_bsl = 1 			// Create stand-in baseline dummy var
		// estadd loc stat`blcontrols' = "No" : col`i' // Note that baseline controls not used
	}
	
	// If baseline exists... 
	else if !_rc {
		loc proxyflag = 0 
		loc created_var = 0
		replace `y_var'_bsl = 0 if missing(`y_var'_bsl) // Replace baseline to 0 if missing 	
	}
	
	if "`baselinevals'"=="baselinevals" local blvals `y_var'_bsl m_`y_var'_bsl // Add to baseline variables local 
	else local blvals // Empty
	
	//***********************************************************
	// RUN INVERSE PROBABILITY WEIGHTING (IPW) REGRESSION 
	
	// Drop existing inverse probability weighting variables
	cap drop ipw_temp		
	cap drop pr_temp
		
	bsample hhinvillage, strata(village) // NEW: Create a bootstrap sample stratified by village, with the number of households in each village as the sample size 
	
	
	// Predict weight 
	logit surveyed `addlcontrols' // NEW: Should this be treatment and random epsilon? 
	
	predict pr_temp, pr
	g double ipw_temp = surveyed/pr_temp // NEW: Is this correct? 
	
	// IPW regression
	areg `y_var'_end treatment `blvals' `addlcontrols' `ifif' [pw=ipw_temp], absorb(`fixedeffects') vce(`stderrors') // NEW: Run regression with y-variable with introduced random error 
	
	mat def bootstable = r(table)
	
	// NEW: Add estimated values to matrix
	matrix bootsmat_`y_var'[`b', 1] = bootstable[1, 1] 
	matrix bootsmat_`y_var'[`b', 2] = bootstable[2, 1]
		
	mat drop bootstable 
		
	} // NEW: end foreach b 
	
	cd "${rep_data}" // Open replication data directory 
	
	mat list bootsmat_`y_var'
	
	
	svmat bootsmat_`y_var', names(col) // Save matrix as dataset 
	
	keep b_hat se_hat 
	drop if missing(b_hat)
	la var b_hat "Estimated b"
	la var se_hat "Estimated Standard Error"
	
	sum b_hat 
	
	g b_hat_demean = b_hat - r(mean)
	la var b_hat_demean "De-Meaned Estimated b"
	
	g y_name = "`y_var'"
	la var y_name "Y-Variable"
	
	save "bootsmat_`y_var'.dta", replace 
		
	} // end foreach y_var
	
	
	//***********************************************************
	// COMBINE DATASETS 
	
	clear 
	
	foreach y_var in `varlist' { // Loop through outcome variables 
		append using "bootsmat_`y_var'.dta"
	}
	
	save "bootsmat_all.dta", replace
	
	
// **************************************************************
// **************************************************************
// PART TWO 

// **************************************************************
// CREATE TABLE FRAMEWORK 

// Clear estimates 
clear
eststo clear
estimates drop _all 

// Create blank table
set obs 10
gen x = 1
gen y = 1

// Count outcomes and create placeholder estimates 
loc columns: word count `varlist' 

forval i = 1/`columns' { 
	eststo col`i': qui reg x y
}

// Set table cell locals 
loc j = 1
loc treatcoef = `j'		// Treatment starred coefficient
loc ++j
loc treatse = `j'		// Treatment standard error 
loc ++j
loc contmean = `j'		// Control mean 
loc ++j
loc contsd = `j'		// Control standard deviation 
loc ++j
loc obs = `j'			// Observations 
loc ++j
loc blcontrols  = `j'	// Binary for baseline controls 
loc ++j
loc numzero = `j'		// Count with outcome == 0

loc stats "" 			// Added scalars to be filled
loc col_titles "" 		// Labels for columns vars to be filled

use "${rep_analysis}.dta", clear

// **************************************************************
// REPLACE MISSINGS WITH ZEROS

// ADDITIONAL CONTROLS 
// 	Replace missings with zeroes 
foreach var in `addlcontrols' {
	replace `var' = 0 if missing(`var') // Replace to 0 if missing 
}

// **************************************************************
// FILL TABLE CELLS 
//	Table cells include coefficients, standard errors, p-values 

loc i = 1 // Start a counter 

foreach y_var in `varlist' { // Loop through table variables 
	
	//***********************************************************
	// BASELINE CONTROLS 
	// 	Replace missings with zeroes 
	// 	Add "missing" dummies to controls 

	// Check whether baseline value exists 
	cap confirm variable `y_var'_bsl
	
	// If baseline doesn't exist... 
	if _rc { 
		loc proxyflag = 1 
		loc created_var = 1					
		gen `y_var'_bsl = 1 			// Create stand-in baseline var
		gen m_`y_var'_bsl = 1 			// Create stand-in baseline dummy var
		estadd loc stat`blcontrols' = "No" : col`i' // Note that baseline controls not used
	}
	
	// If baseline exists... 
	else if !_rc {
		loc proxyflag = 0 
		loc created_var = 0
		replace `y_var'_bsl = 0 if missing(`y_var'_bsl) // Replace baseline to 0 if missing 	
		if ("`baselinevals'"=="baselinevals") estadd loc stat`blcontrols' = "Yes" : col`i' // Note baseline controls used 
		else estadd loc stat`blcontrols' = "No" : col`i' // Note baseline controls not used 
	}
	
	if "`baselinevals'"=="baselinevals" local blvals `y_var'_bsl m_`y_var'_bsl // Add to baseline variables local 
	else local blvals // Empty
		
	//***********************************************************
	// RUN INVERSE PROBABILITY WEIGHTING (IPW) REGRESSION 
	
	// Drop existing inverse probability weighting variables
	cap drop ipw_temp		
	cap drop pr_temp
		
	// Predict weight 
	logit surveyed `addlcontrols' 
	
	predict pr_temp, pr
	g double ipw_temp = surveyed/pr_temp 
	
	// IPW regression
	areg `y_var'_end treatment `blvals' `addlcontrols' `ifif' [pw=ipw_temp], absorb(`fixedeffects') vce(`stderrors') 
	loc coef "treatment"
	
/*
	// Main Regression (???)
	// CC to review 
	qui areg `y_var'_end treatment `blvals' `addlcontrols' `ifif', absorb(`fixedeffects') vce(`stderrors') 
	loc coef "treatment"	
		
*/
	// Save results to matrix A for later use
	mat def A = r(table)
	
	loc b_original = el(A,rownumb(A,"b"),colnumb(A,"`coef'"))
	
	//***********************************************************
	// ADD BOOTSTRAP STATISTICS 
	preserve 
	
	use "bootsmat_`y_var'.dta", clear 
	
	qui count if (abs(b_hat_demean) - abs(`b_original')) > 0
		
	loc tail_n = r(N)
	
	su b_hat 
	
	loc p_value = `tail_n' / _N
	
	sum b_hat_demean
	loc se_boostrap = r(sd)
			
	restore 
	
	//***********************************************************
	// SAVE STATISTICS
	
	// Add measure of % zero
	qui count if `y_var'_end == 0 & e(sample) == 1
	loc top = r(N)
	qui count if  e(sample) == 1
	loc bot = r(N)
	estadd loc stat`numzero' = string(`top'/`bot',"%9.2f"): col`i'

	// Add treatment coefficient and standard error	
	// Standard error
	loc se = `se_boostrap' // el(A,rownumb(A,"se"),colnumb(A,"`coef'"))
	estadd loc stat`treatse' = "(" + string(`se',"%9.2f") + ")" : col`i' //Add standard error
		
	// P-value: to get stars
	local thisp = `p_value' // el(A,rownumb(A,"pvalue"),colnumb(A,"`coef'"))
	
		if `thisp' < 0.01 {
			loc bstar "***"
		}
		else if `thisp' < 0.05 {
			loc bstar "**"
		}
		else if `thisp' < 0.1 {
			loc bstar "*"
		}
		else {
			local bstar ""
		}
	// Assign to coefficient
	loc coefficient = `b_original' // el(A,rownumb(A,"b"),colnumb(A,"`coef'"))
	estadd loc stat`treatcoef' = string(`coefficient',"%9.2f") + "`bstar'": col`i'

	// Control group mean and SD
	qui sum `y_var'_end if treatment == 0  & e(sample) == 1
	estadd loc stat`contmean' = string(`r(mean)', "%9.2f"): col`i'
	estadd loc stat`contsd' = string(`r(sd)', "%9.2f") : col`i'
		
	// Total observations
	qui sum `y_var'_end if e(sample) == 1
	estadd loc stat`obs' = string(`r(N)', "%9.0f"): col`i'
	
	// Balance Testing 
	if "`balance'"=="balance" {		
		if `proxyflag' { // If it does not exist... 
			foreach wins in "" "_win1" {
				if "`y_var'"=="asset_tot_value`wins'" {
					loc proxyvar asset_index`wins'_bsl 
				}
				if "`y_var'"=="inc_LS`wins'" {
					loc proxyvar ls_hany`wins'_bsl  
				}
				if "`y_var'"=="inc_nonLS`wins'" {
					loc proxyvar swf_income`wins'_bsl
				}
				if "`y_var'"=="savings_index`wins'" {
					loc proxyvar savings`wins'_bsl
				}
			}
			if "`proxyvar'"=="" {
			    estadd loc stat`baltest' = "--": col`i'
				estadd loc stat`balproxy' = "Missing": col`i'
			}
			else if "`proxyvar'"!="" {
				// loc ballist `ballist' `proxyvar'
			    qui reg `proxyvar' `balancevar' i.village `ifif', r 
				test `balancevar' = 0 
				estadd loc stat`baltest' = string(`r(p)', "%9.2f"): col`i'
				estadd loc stat`balproxy' = "Yes": col`i'
				loc proxyvar 
			}
		}
		
		else if !`proxyflag' { // If baseline value exists, run balance test 
			// loc ballist `ballist' `y_var'_bsl
		    qui reg `y_var'_bsl `balancevar' i.village `ifif', r 
			test `balancevar' = 0 
			estadd loc stat`baltest' = string(`r(p)', "%9.2f"): col`i'
			estadd loc stat`balproxy' = "No": col`i'
		}
		
	}
	
	loc ++i
	mat drop A
	
	// Row labels and update locals
	loc thisvarlabel: variable label `y_var'_end // Extracts label from row var
	local col_titles "`col_titles' "`thisvarlabel'" "		
	
}
// end foreach y_var
	
// Set stats to include in table
forv i = 1/`j' {
	loc stats "`stats' stat`i' "
}
// end forv i

	
//***********************************************************
// EXPORT TABLE 

cd "`foldername'" // Call output folder directory 

esttab col* using "`filename'.csv", title("`title'") cells(none) ///
	nonum mtitle(`col_titles') stats(`stats',labels("Treatment" " " "Control Mean" "Control SD" "Observations (Total)"  ///
	"Controls for Baseline Values" "Proportion of Obs Equal Zero" "`balword1'" "`balword2'")) ///
	note("`footnote'") compress wrap replace

end 



